{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 问题： 我们要按照特定的文本模式匹配或查找\n",
    "# 解决:str.find(),str.endswith,str.startwith()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\nTrue\nFalse\n10\n"
     ]
    }
   ],
   "source": [
    "text = 'yeah, but no, but yeah, but no, but yeah'\n",
    "# Exact math\n",
    "print(text == 'yeah')\n",
    "\n",
    "# Match at start or end\n",
    "print(text.startswith('yea'))\n",
    "print(text.endswith('butyeah'))\n",
    "\n",
    "# Search for the location of the first occurrence\n",
    "print(text.find('no'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "# 更复杂的匹配则需要正则表达式\n",
    "import re\n",
    "text1 = '11/27/2012'\n",
    "text2 = 'Nov 27, 2012'\n",
    "\n",
    "# Simple matching: \\d+ means match one or more digits\n",
    "print(bool(re.match(r'\\d+/\\d+/\\d+', text1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "# 如果针对同一种模式做多次匹配，那么通常先将正则表达式模式预编译成一个模式对象。\n",
    "datepat = re.compile(r'\\d+/\\d+/\\d+')\n",
    "print(bool(datepat.match(text1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['11/27/2012', '3/13/2013']\n"
     ]
    }
   ],
   "source": [
    "# match 方法总是尝试在字符串开头找到匹配项。\n",
    "# 如果想针对整个文本搜索出所有匹配项，那么该使用findall()方法。\n",
    "text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'\n",
    "print(datepat.findall(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n('11', '27', '2012')\n"
     ]
    }
   ],
   "source": [
    "# 当定义正则表达式时，我们常会将部分模式用括号包起来的方式引入捕获组。\n",
    "datepat = re.compile(r'(\\d+)/(\\d+)/(\\d+)')\n",
    "\n",
    "# 捕获组通常能简化后续对匹配文本的处理，因为每个组的内容都可以单独提取出来。\n",
    "m = datepat.match('11/27/2012/11/27/2012')\n",
    "#全部\n",
    "# print(m.group(0))\n",
    "# 小组第一个\n",
    "# print(m.group(1))\n",
    "\n",
    "# 测试\n",
    "print(m.group() == m.group(0))\n",
    "print(m.groups())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('11', '27', '2012'), ('3', '13', '2013')]\n"
     ]
    }
   ],
   "source": [
    "# Find all  matches (notice splitting into tuples)\n",
    "text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'\n",
    "m = datepat.findall(text)\n",
    "print(m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('11', '27', '2012')\n--------------------------\n('3', '13', '2013')\n--------------------------\n"
     ]
    }
   ],
   "source": [
    "for m in datepat.finditer(text):\n",
    "    print(m.groups())\n",
    "    print('--------------------------')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 小结： \n",
    "# 1. 指定模式时，会使用原始字符串，如： r'(\\d+)/(\\d+)/(\\d+)'\n",
    "#    这样的字符串中反斜线不会字符转义。\n",
    "#    否则要用双反斜线。\n",
    "\n",
    "\n",
    "# 2. 要精确匹配则要在结尾添加（$）标记。\n",
    "\n",
    "# 3.如果只是简单的匹配或者查找可以不预编译。如果是多次大量则需要预编译，这样可以省去重复预编译的步骤提升性能。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    ""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2.0
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}